/*** ^^A -*-C++-*- **********************************************/
/*  au_if_nus                23.05.2019                         */
/****************************************************************/
/*  Short Description :                                         */
/*  Automated NUS for interferogram experiments.                */
/****************************************************************/
/*  Keywords :                                                  */
/*  zg, NUS                                                     */
/****************************************************************/
/*  Description/Usage :                                         */
/*  Automated NUS for interferogram experiments.                */
/*  1D spectrum must be accessible as 2nd experiment (edc2).    */
/*  Requires Python script sampling_simulatorXX.py, and a       */
/*  suitable external CPython environment.                      */
/****************************************************************/
/*  Author(s) :                                                 */
/*  Name    : Geoffrey Akien                                    */
/*  Organisation  : Lancaster University                        */
/*  Email    : g.akien@lancaster.ac.uk                          */
/****************************************************************/
/*  Name   Date    Modification:                                */
/*  gra    190523  created                                      */
/****************************************************************/

//0 for nothing, 1 for additional reporting (printf), 2 for a dry run
#define DEBUG 0

//define your Python command here
//NB numpy and scipy must be installed with pip beforehand
//#define PYTHON_PATH "python" //even if on the path, not recognised by TopSpin
#define PYTHON_PATH "C:/Python27/python.exe" //for vanilla python 2.7
#define PYTHON_SCRIPT "C:/Python27/Scripts/sampling_simulator3b.py"

//input/output files for the Python script (ACQUPATH)
#define PYTHON_MULTIPLETS "multiplets1.txt"
#define PYTHON_NUSLIST "best_sampling.txt"

//system-specific GRPDLY - a workaround until we come up with something better
#define DEFAULT_GRPDLY 76
  

AUERR = local_au(curdat);
QUIT

//lcUtil needed for the "SortDouble" function, so we can calculate the median
#include <inc/lcUtil>

int local_au(const char* curdat)
{
  /* declare variables */
  //general variables
  char text[PATH_MAX], py_cmd[PATH_MAX] = PYTHON_PATH;
  char disk_save[PATH_MAX], user_save[PATH_MAX], name_save[PATH_MAX];
  char peaklist[PATH_MAX];
  char source_int1d[PATH_MAX], destination_multiplets_name[PATH_MAX] = PYTHON_MULTIPLETS, destination_multiplets[PATH_MAX];
  char source_nuslist[PATH_MAX], destination_nuslist_name[PATH_MAX] = PYTHON_NUSLIST, destination_nuslist[PATH_MAX];
  int expno_save = expno, procno_save = procno, prep_digmod, PH_mod_status = -1, FT_mod_status = -1;
  
  //for preparation experiment
  int i, numPeaks, numFreq, parmode;
  float lb_status, prep_offset, prep_HZpPT, prep_PPMpPT, prep_aq, prep_swh, prep_swh1;
  float leftLimit, rightLimit, ppmFromLeft, multipletCentre, pointsfromLeft, peakArea;
  double prep_grpdly, prep_sf, prep_sw_p, f1p, f2p;
  double medianPeakWidth_Hz, medianPeakWidth;

  //experimental parameters
  double nusamount, if_grpdly;
  float if_swh, if_swh1;
  int if_digmod, chunksize, totalchunks, totalpoints, actualchunks;


  //quick check for existing of Python files - pointless if they don't exist!
  //https://stackoverflow.com/questions/230062/whats-the-best-way-to-check-if-a-file-exists-in-c
  if(access(PYTHON_PATH, F_OK) == -1)
    STOPMSG("Could not locate Python exe.") 

  if(access(PYTHON_SCRIPT, R_OK) == -1)
    STOPMSG("Could not locate Python script.")
    
  //optional flags
  int proc_flag = 1;
  
  if (i_argc > 2 && strcmp(i_argv[2], "noproc") == 0)
    proc_flag = 0;

  FETCHPAR("PARMODE", &parmode)
  
  if (parmode == 0)
    STOPMSG("Not compatible with 1D sequences.")

  //save parameters for current dataset so we can navigate back to it later
  strcpy(disk_save, disk);
  strcpy(user_save, user);
  strcpy(name_save, name);

  if (DEBUG > 0)
  {
	  printf("Parent dataset:\n");
	  printf("disk: %s\n", disk_save);
	  printf("user: %s\n", user_save);
	  printf("name: %s\n", name_save);
    printf("expno: %d\n", expno_save);
	  printf("procno: %d\n\n", procno_save);
  }


  /***********************************************************************************************************************/
  /* Navigate to 1D dataset, pick peaks and integrate so we can define multiplets later */
  /***********************************************************************************************************************/

  //cf au_getl1d
  //navigate to 2nd dataset - must be defined in edc2, e.g. as a "projection" in IconNMR
  //attempts to populates name2, expno2, procno2, disk2, user2 (from curdat2) so we can use it later
  GETCURDATA2

  //GETCURDATA2 fails if curdat2 has not been generated
  if (AUERR < 0)
    STOPMSG("Can't get CURDATA2 parameters.")

  ERRORABORT

  //navigate to 2nd dataset
  if (DEBUG > 0)
  {
	  printf("2nd (preparation) dataset for calculation of optimised NUS schedule:\n");
	  printf("disk2: %s\n", disk2);
	  printf("user2: %s\n", user2);
	  printf("name2: %s\n", name2);
	  printf("expno2: %d\n", expno2);
	  printf("procno2: %d\n\n", procno2);
  }

  DATASET(name2, expno2, procno2, disk2, user2)

  FETCHPARS("PH_mod", &PH_mod_status)
  FETCHPARS("FT_mod", &FT_mod_status)
  FETCHPARS("LB", &lb_status)
  FETCHPARS("DIGMOD", &prep_digmod)
  
  //only makes sense if both the 1D and the PSYCHE are with baseopt
  FETCHPARS("GRPDLY", &prep_grpdly)
  
  //if it wasn't processed to our satiprep_sfication yet...
  if (PH_mod_status == 0 && FT_mod_status == 0)
  {
	  if (DEBUG > 0)
	    printf("1D data set was not FT'd or phased - doing now...\n\n");
	
	  STOREPAR("LB", 0.0)
	
	  EF
	  ERRORABORT
	
    if (prep_digmod == 3) //baseopt
      APK0
    else
      APK
  }

  //option to skip this with the "noproc" argument - gives a bit more flexibilty for manual multiplet picking
  if (proc_flag == 1)
  {
    if (lb_status != 0.0)
    { 
      //this gives better behaviour for more complicated multiplets
      //improves the ability to pick all the peaks, especially those at the edges of the multiplet
      STOREPAR("LB", 0.0)
  
      //usually the inherent linewidth is limiting, so no further improvements
      //obtainable with resolution-enhancing window functions

	    EFP
    }

    /***********************************************************************************************************************/
    /* peak-picking */
    /* cf proc_1d */
    /* define the plot region as big as the complete acquisition region */
    // necessary because the AU program "PP" actually behaves more like ppf does when used at the command-line
    FETCHPARS("OFFSET", &prep_offset)
    FETCHPARS("SW_p" ,&prep_sw_p)
    FETCHPARS("SF", &prep_sf)
    f1p = prep_offset;
    f2p = f1p - prep_sw_p / prep_sf;
    STOREPAR("F1P", f1p)
    STOREPAR("F2P", f2p)
 
    //standard values
    STOREPAR("CY", 15.0) //defined intensity of largest peak
    STOREPAR("MAXI", 10000.0)
    STOREPAR("PC", 1.0) //S/N threshold for peak-picking (criterion never invoked if MI > 0)
    STOREPAR("PSIGN", 0)
    STOREPAR("PSCAL", 4) //MI criterion is relative to non-solvent peaks, and will peak pick solvent peaks too 
  
    //key values to get it working nicely for this application
    //threshold to ignore peaks i.e. peaks smaller than MI/CY are ignored
    STOREPAR("MI", 0.8)

    if (DEBUG > 0)
      printf("Peak-picking...\n");
  
    //pick peaks
    PP
    
    //stores it into peaklist format, which includes the peak widths we're interested in
    PPP

    /***********************************************************************************************************************/
    //integration
    //much harder to get right
    STOREPAR("ABSL", 50.0)  //the ~S/N required to define a region as containing signal
    STOREPAR("AZFW", 18.0 / prep_sf); //minimum seperation between integrals
    STOREPAR("AZFE", 0.0) //additional padding of the integrals
    STOREPAR("ISEN", 32.0) //dynamic range limits - avoids integration of 13C-satellites and impurities

    if (DEBUG > 0)
      printf("Identifying multiplets...\n\n");

    ABS
  }
  else
  {
	  //convert whatever we had before
	  XCMD("sendgui convertpeaklist peaklist")
  }
  
  //ugly hack to count how many peaks we've got, and we can initialise appropriately-sized arrays later
  numPeaks = readPeakList(PROCPATH(0));
	freePeakList();

  //we need this for converting the frequencies to points later
  FETCHPARS("AQ", &prep_aq)
  FETCHPARS("SWH", &prep_swh)
  FETCHPARS("HZpPT", &prep_HZpPT)
  prep_PPMpPT = prep_HZpPT / prep_sf;
  
  //gives us a more useful plain text int1d that we can work with
  //(also writes to binary files int and intgap)
  STOREPAR("CURPRIN", "") //optional additional output - same as int1d - not required
  LI

  //define the paths for the source peaklists and integration regions
  strcpy(peaklist, PROCPATH("peaklist"));
  strcpy(source_int1d, PROCPATH("int1d"));

  if (DEBUG > 0)
  {
    printf("Source for peaklist: %s\n", peaklist);
    printf("Source for integrations: %s\n\n", source_int1d);
  }

  /***********************************************************************************************************************/ 
  /***********************************************************************************************************************/


  /***********************************************************************************************************************/
  /***********************************************************************************************************************/
  /* Convert files to the correct format, generate sample schedule, then acquire */

  //navigate back to main dataset
  DATASET(name_save, expno_save, procno_save, disk_save, user_save)

  if (DEBUG > 0)
    printf("Navigating back to main dataset\n\n");
  

  /***********************************************************************************************************************/
  /* Peaklist first */

  if (DEBUG > 0) 
	  printf("Calculating nominal peak width (%d peaks) for Python script...\n", numPeaks);

  //opens peaklist file if it can
  FILE *peaklist_ptr = fopen(peaklist, "r"); 

  if (peaklist_ptr == NULL)
    STOPMSG("Could not access peaklist file.")
  
  //first two lines can be discarded
  //(note that Notepad++ unhelpfully handles the Windows double line terminators as two separate terminations)
  //(for once, Notepad displays the correct output)
  //H
  //#frequency    half width   %gauss/100.
  //this peak width is the number of processed points wide, so is not a precise measurement
  for (i = 0; i < 2; i++)
  {
	  //reads up to PATH_MAX, or the null ptr, or the end of line
    fgets(text, PATH_MAX, peaklist_ptr);
  }
  
  double peakWidth[numPeaks];
  double cumulativePeakWidth = 0.0, meanPeakWidth_Hz, meanPeakWidth;
  int sortedIndex[numPeaks];

  for (i = 0; i < numPeaks; i++)
  {
  	//reads up to PATH_MAX, or the null ptr, or the end of line
    fgets(text, PATH_MAX, peaklist_ptr);
    
	  //parse the line - format is:
    //3695.271       2.90            0.0
    //we only need the width - the * discards the frequency
    //absence of the 3rd number means its ignored
    sscanf(text, "%*f %lf", &peakWidth[i]);
    
    //for if you want the mean...
    cumulativePeakWidth += peakWidth[i];
  }

  //close the file
  fclose(peaklist_ptr);
  
  //sort the peaks by frequency
  SortDouble(numPeaks, sortedIndex, peakWidth);
  
  if (numPeaks % 2 == 0) //even
  	medianPeakWidth_Hz = (peakWidth[sortedIndex[numPeaks / 2]] + peakWidth[sortedIndex[numPeaks / 2 - 1]]) / 2;
  else //odd
    medianPeakWidth_Hz = peakWidth[sortedIndex[(numPeaks + 1) / 2]];
    
  //...and now convert it to the preferred format for the Python script
  //Hz * pi * AQ / spectral width
  medianPeakWidth = medianPeakWidth_Hz * M_PI * prep_aq / prep_swh;
    
  if (DEBUG > 0)
    printf("Median (nominal) peak width: %.2f Hz (%.9f)\n", medianPeakWidth_Hz, medianPeakWidth);
  
  //...or the mean
  meanPeakWidth_Hz = cumulativePeakWidth / numPeaks;
  meanPeakWidth = meanPeakWidth_Hz * M_PI * prep_aq / prep_swh;
  
  if (DEBUG > 0)
    printf("Mean (nominal) peak width: %.2f Hz (%.9f) (currently unused)\n", meanPeakWidth_Hz, meanPeakWidth);
    
  if (DEBUG > 0)
    printf("\n");
  

  /***********************************************************************************************************************/
  /* Multiplets next */

  if (DEBUG > 0)
	  printf("Preparing multiplet list (%s) for Python script...\n", destination_multiplets_name);

  /* path for destination file */
  strcpy(destination_multiplets, ACQUPATH(destination_multiplets_name));

  if (DEBUG > 0)
    printf("Destination: %s\n", destination_multiplets);

  //opens integration file if it can
  FILE *int1d_ptr = fopen(source_int1d, "r"); 

  if (int1d_ptr == NULL)
    STOPMSG("Could not access int1d file.")
  
  //...and its destination
  //"w" will overwrite with an empty file if it already exists
  FILE *multiplets_ptr = fopen(destination_multiplets, "w"); 

  if (multiplets_ptr == NULL)
    STOPMSG("Could not generate new multiplets1.txt file.")
  
  //first five lines can be discarded:
  //   Current data set:
  //   NAME =_home_nmr700_tt_oo_fid01.fid   EXPNO =     1   PROCNO =     3
  //   DATPATH = \\lancs\homes\29\akien\Downloads
  //
  //   Number   Integrated Region     Integral
  for (i = 0; i < 5; i++)
  {
	  //reads up to PATH_MAX, or the null ptr, or the end of line
    fgets(text, PATH_MAX, peaklist_ptr);
  }

  numFreq = 0;

  /* read a line - keep going until there's nothing left */
  while (fgets(text, PATH_MAX, peaklist_ptr) != NULL )
  {
	  //read the line - format:
    //     1      5.313      5.253          5.20211 
    sscanf(text, "%*d %f %f %f", &leftLimit, &rightLimit, &peakArea); //ignore the first number with *
  
    //we want to convert everything into number of points (from the left)
    multipletCentre = (leftLimit + rightLimit) * 0.5;
    ppmFromLeft = prep_offset - multipletCentre;
    pointsfromLeft = ppmFromLeft / prep_PPMpPT;
  
    //write the centre of the multiplet, and its intensity
    fprintf(multiplets_ptr, "%.2f\t%.2f\n", pointsfromLeft, peakArea);
    
    numFreq += 1;
  }

  //close the files
  fclose(int1d_ptr);
  fclose(multiplets_ptr);
  
  if (DEBUG > 0)
    printf("Detected %d frequencies.\n\n", numFreq);

  /***********************************************************************************************************************/
  /***********************************************************************************************************************/


  /***********************************************************************************************************************/
  /***********************************************************************************************************************/
  /* Generate the schedule */
  
   /* If using the automatic scheduler (NUSLIST "automatic"): */
  /* x TD is the number of reconstructed increments */
  /* NusAMOUNT is the fraction of increments to acquire */
  /* "automatic" is a vclist that gets generated automatically */
  /* ...that has NusPOINTS increments */
  
  /* If using a custom schedule (NUSLIST "mySchedule"): */
  /* The number of acquired points appears to be controlled by the vclist alone */
  /* i.e. NusAMOUNT is ignored */  

  //need to pass along various parameters to the Python script
  FETCHPAR("NusAMOUNT", &nusamount)
  FETCHPAR("SWH", &if_swh)
  FETCHPARN(1, "SWH", &if_swh1)
  
  //group delay handling
  FETCHPAR("DIGMOD", &if_digmod)
  
  //if both are baseopt...
  if (if_digmod == 3 && prep_digmod == 3)
    if_grpdly = prep_grpdly;
  else if (if_digmod == 3)
    if_grpdly = DEFAULT_GRPDLY;
  else
    if_grpdly = 0; //no idea what to do here if not baseopt
  
  //in complex points - for Morris sequences, should be an integer - this converts for us
  //https://www.nmr.chemistry.manchester.ac.uk/?q=node/426
  //2*sw/sw1 + 2*GRPDLY
  //chunksize = 2 * if_swh / if_swh1;
  chunksize = 2 * ((if_swh / if_swh1) + if_grpdly);
  
  //(assuming uniform sampling)
  FETCHPARN(1, "TD", &totalchunks)

  totalpoints = totalchunks * chunksize;

  if (DEBUG > 0)
  {
    printf("Experimental parameters:\n");
    printf("SWH: %.2f Hz\n", if_swh);
    printf("1 SWH: %.2f Hz\n", if_swh1);
    printf("Assumed GRPDLY: %d\n", if_grpdly);
    printf("Chunk size: %d points\n", chunksize);
    printf("Number of chunks (1 TD): %d\n", totalchunks);
    printf("Number of points in reconstructed FID (excluding 2*GRPDLY): %d\n", totalpoints);
    printf("\n");
  }

  //prepare the command
  sprintf(py_cmd, "%s %s %s %.19f %d %d %.2f",
                   PYTHON_PATH, PYTHON_SCRIPT, destination_multiplets, medianPeakWidth, totalpoints, chunksize, nusamount/100);

  if (DEBUG > 0)
  {
    printf("Generating schedule...\n");
    printf("NUS amount: %.2f%%\n", nusamount);
    printf(py_cmd);
    printf("\n\n");
  }
  
  Show_status("Calculating optimal schedule...");
  
  //generates a sample schedule into "best_sampling.txt"
  
  //undocumented, but unfortunately not possible since Jython doesn't support numpy/scipy
  //XPY("sampling_simulator3b.py") //needs arguments
  
  //requires external Cython environment 
  //For reasons I don't understand, using TopSpin 3.6 in W7 results in the print statements in this program
  //getting dumped to the top of stdout, rather than inline with the rest of the TopSpin output (W10 didn't do this)
  system(py_cmd);
  
  if (DEBUG > 0)
    printf("\n");
  

  /***********************************************************************************************************************/
  /* Copy the schedule into the right place */

  if (DEBUG > 0)
    printf("NUS list name: %s\n", destination_nuslist_name);

  //must change the default NUSLIST from "automatic" or calls to ZG will invoke the internal sample scheduler
  STOREPAR("NUSLIST", destination_nuslist_name)

  //source
  strcpy(source_nuslist, ACQUPATH(destination_nuslist_name));

  if (DEBUG > 0)
    printf("NUS source path name: %s\n", source_nuslist);

  //the NUSLIST that gets used is actually pulled from the vclists folder, so we need to copy that over
  getParfileDirForWrite(destination_nuslist_name, VC_DIRS, destination_nuslist);

  if (DEBUG > 0)
  {
    printf("NUSLIST destination: %s\n\n", destination_nuslist); 
    printf("Schedule:\n");
  }
  
  //open the files
  //opens nuslist file if it can
  FILE *source_nuslist_ptr = fopen(source_nuslist, "r"); 

  if (source_nuslist_ptr == NULL)
    STOPMSG("Could not access source NUS list file.")
  
  //...and its destination
  //"w" will overwrite with an empty file if it already exists
  FILE *destination_nuslist_ptr = fopen(destination_nuslist, "w"); 

  if (destination_nuslist_ptr == NULL)
    STOPMSG("Could not generate new NUS list file.")
    
  actualchunks = 0;
  
  /* read a line - keep going until there's nothing */
  while (fgets(text, PATH_MAX, source_nuslist_ptr) != NULL )
  {
    //write the line back
    fputs(text, destination_nuslist_ptr);
    
    actualchunks += 1;
    
    if (DEBUG > 0)
      printf(text);
  }

  //close the files
  fclose(source_nuslist_ptr);
  fclose(destination_nuslist_ptr);
  
  if (DEBUG > 0)
    printf("Actual chunks to be acquired: %d\n\n", actualchunks);
    
  Show_status("Finished calculating optimal schedule.");


  /***********************************************************************************************************************/

  if (DEBUG > 0)
    printf("Acquiring data...\n");
 
  if (DEBUG < 2)
    XAU("au_zg", "")
  else
    printf("Dry run - did not acquire data.\n");
}